* Author: Anina Schwarzenbach
* generated October 2014
* last review August 2020

clear
use "../../STATAfiles/FR_POLIS_31juillet14_FINAL&IRIS_pcnt.dta"
set more off

*load ADOS
sysdir set PERSONAL "../../../ados"


*******************************************************************************************
*cleaning of migration variable + building of ethnicity variables FRANCE
******************************************************************************************

****** STEP 1 : GENERATE 1 VAR KID 2 VAR PARENTS 4 VAR GRANDPARENTS *******

*input variables for building ethnic background variables 
fre R2PAYSQ3 R2PAYSQ4S1 R2PAYSQ4S2 R2PAYSQ5S1 R2PAYSQ5S2 R2PAYSQ6S1 R2PAYSQ6S2   

foreach v of var R2PAYSQ3 R2PAYSQ4S1 R2PAYSQ4S2 R2PAYSQ5S1 R2PAYSQ5S2 R2PAYSQ6S1 R2PAYSQ6S2 { 
	recode `v' (2=1) (3=2) (4=3) (5=4) (6=5)(7=6) (8=7), gen(`v'r)
} 

tab R2PAYSQ3r
tab R2PAYSQ3

tab R2PAYSQ4S1r 
tab R2PAYSQ4S1


********* ********* ********* ********* ********* ********* ********* ******** ********** ********* ********* ********* ********* ********* 

**** STEP 2: GENERATE VARIABLES WITH 7 OCCURRENCES*****

*generate a count variable for any value in R2PAYSQ3r R2PAYSQ4S1r R2PAYSQ4S2r R2PAYSQ5S1r R2PAYSQ5S2r R2PAYSQ6S1r R2PAYSQ6S2r
	** 1 "France" 
	** 2 "Europe" 
	** 3 "Maghreb" 
	** 4 "middle East" 
	** 5 "Rest of Africa" 
	** 6 "Rest of Asia" 
	** 7 "Rest of World" 
	** 99 " NR" 


forvalues i = 1/7 {
		egen ethnkidFR_`i' = anycount (R2PAYSQ3r), values (`i')
		egen ethnparFR_`i' = anycount (R2PAYSQ4S1r R2PAYSQ4S2r), values (`i')
		egen ethngpFR_`i' = anycount (R2PAYSQ5S1r R2PAYSQ5S2r R2PAYSQ6S1r R2PAYSQ6S2r), values (`i')
				  }	
egen ethnkidFR_miss = anycount(R2PAYSQ3r), values(99)
egen ethnparFR_miss =  anycount (R2PAYSQ4S1r R2PAYSQ4S2r), values (99)  
egen ethngpFR_miss  = anycount (R2PAYSQ5S1r R2PAYSQ5S2r R2PAYSQ6S1r R2PAYSQ6S2r), values (99)  

label var ethnkidFR_miss "FR kid missing"
label var ethnparFR_miss "FR parents missing"
label var ethngpFR_miss "FR grandparents missing"

label var ethnkidFR_1 "FR kid France"
label var ethnparFR_1 "FR parents France"
label var ethngpFR_1 "FR grandparents France"

label var ethnkidFR_2 "FR kid Europe"
label var ethnparFR_2 "FR parents Europe"
label var ethngpFR_2 "FR grandparents Europe"

label var ethnkidFR_3 "FR kid Maghreb"
label var ethnparFR_3 "FR parents Maghreb"
label var ethngpFR_3 "FR grandparents Maghreb"

label var ethnkidFR_4 "FR kid middle East"
label var ethnparFR_4 "FR parents middle East"
label var ethngpFR_4 "FR grandparents middle East"
	
label var ethnkidFR_5 "FR kid Rest of Africa"
label var ethnparFR_5 "FR parents Rest of Africa"
label var ethngpFR_5 "FR grandparents Rest of Africa"	

label var ethnkidFR_6 "FR kid Rest of Asia"
label var ethnparFR_6 "FR parents Rest of Asia"
label var ethngpFR_6 "FR grandparents Rest of Asia"	

label var ethnkidFR_7 "FR kid Rest of World"
label var ethnparFR_7 "FR parents Rest of World"
label var ethngpFR_7 "FR grandparents Rest of World"		
	
foreach v of var ethnkidFR_* ethnparFR_* ethngpFR_* {
		tab `v', m
}
 
********* ********* ********* ********* ********* ********* ********* ******** ********** ********* ********* ********* ********* ********* 
	
**** STEP 3: GENERATE ETHNICITY VARIABLE*****
* Generate Ethnicity Variable with 9 Occurences
	**1"France (y outhre-mer)" 
	**2"Europe" 
	**3 "Maghreb" 
	**4 "Middle East" 
	**5 "Rest of Africa" 
	**6 "Rest of Asia" 
	**7 "Rest of World" 
	**8 "Mixed France/Maghreb" 
	**9 "Mixed France/other"  
	**99 "Missing"

gen ethn1FR=.



* RULE 1: if two par have mb kid has mb as well (do not consider information of gp)

forvalues i=2/7 {
	replace ethn1FR=`i' if ethnparFR_`i'==2
}

* parents are of mixed background
replace ethn1FR=7 if ethn1FR==. & ethnparFR_1==0 & ethnparFR_miss==0

*fre ethn1FR

* RULE 2: on top of that the ethnicity is defined by having at least three gp of the same background 

forvalues i=1/7 {
           replace ethn1FR=`i' if ethn1FR==. & ethngpFR_`i'>=3
}


* >= 3 gp parents of mixed foreign background 
replace ethn1FR=7 if ethn1FR==. & ethngpFR_1 ==1 & ethngpFR_miss==0
replace ethn1FR=7 if ethn1FR==. & ethngpFR_1 ==0 & ethngpFR_miss<=1
*fre ethn1FR

* RULE 3: mixed ethnicity if two gp (or one parent) are native and two gp (or one parent) are foreign 

replace ethn1FR=8 if  ethn1FR==. & ethngpFR_1==2 & ethngpFR_3==2
replace ethn1FR=8 if  ethn1FR==. & (ethnparFR_1==1 & ethngpFR_1<=2) & ethngpFR_3==2
replace ethn1FR=8 if  ethn1FR==. & ethngpFR_1==2 & (ethnparFR_3==1 & ethngpFR_3<=2)
replace ethn1FR=8 if  ethn1FR==. & ethnparFR_1==1 & ethnparFR_3==1

replace ethn1FR=8 if  ethn1FR==. & ethnparFR_1==2 & ethngpFR_3==2 & ethngpFR_miss==2
replace ethn1FR=8 if  ethn1FR==. & ethngpFR_1==1 &  ethngpFR_3==2 & ethngpFR_miss==1 


forvalues i=2/7 {
			replace ethn1FR=9 if  ethn1FR==. & ethngpFR_1==2 & ethngpFR_`i'==2
			replace ethn1FR=9 if  ethn1FR==. & (ethnparFR_1==1 & ethngpFR_1<=2) & ethngpFR_`i'==2
			replace ethn1FR=9 if  ethn1FR==. & ethngpFR_1==2 & (ethnparFR_`i'==1 & ethngpFR_`i'<=2)
			replace ethn1FR=9 if  ethn1FR==. & ethnparFR_1==1 & ethnparFR_`i'==1
			
			replace ethn1FR=9 if  ethn1FR==. & ethnparFR_1==2 & ethngpFR_`i'==2 & ethngpFR_miss==2
			replace ethn1FR=9 if  ethn1FR==. & ethngpFR_1==2 & ethngpFR_miss==0
			replace ethn1FR=9 if  ethn1FR==. & ethngpFR_1==1 & ethngpFR_`i'==2 & ethngpFR_miss==1 
			replace ethn1FR=9 if  ethn1FR==. & ethngpFR_1==1 & ethngpFR_miss==1
}
*fre ethn1FR

* RULE 4a: native if information of gp is lacking and one parent is native and the other is native or unkown
replace ethn1FR=1 if ethn1FR==. & ethnparFR_1==2  
replace ethn1FR=1 if ethn1FR==. & ethnparFR_1==1 & ethnparFR_miss==1 
*replace ethn1FR=1 if ethn1FR==. & ethngpFR_1==2 & ethnparFR_miss==2 & ethngpFR_miss==2 

* RULE 4b: foreign if information of gp is lacking and one parent is foreign and the other is unkown
forvalues i=2/7 {
			replace ethn1FR=`i' if  ethn1FR==. & ethnparFR_`i'==1 & ethnparFR_miss==1 
			*replace ethn1FR=`i' if  ethn1FR==. & ethngpFR_`i'==2 & ethnparFR_miss==2 & ethngpFR_miss==2
}

*mixed foreign parents
*replace ethn1FR=7 if ethn1FR==. & ethnparFR_1==0 & ethnparFR_miss==0 	

* RULE 5: missing if parents are unknown and two or more grand parents are unknown
replace ethn1FR=99 if ethn1FR==. & ethnparFR_miss==2 & ethngpFR_miss >=2
replace ethn1FR=99 if ethn1FR==.

fre ethn1FR

*recode ethn1FR 
	*0 = native, 1=Europe, 2=maghreb, 3=middle east, 4=other africa, 5=other asia, 6= rest 
*	7=mixed france/maghreb, 8= mixed france/other 99=missing


recode ethn1FR (1=0) (2=1) (3=2) (4=3) (5=4) (6=5) (7=6) (8=7) (9=8)

label define ethn1FRlb 0 "France" 1 "Europe" 2 "Maghreb" 3"Middle East" 4 "Rest of Africa" 5"Rest of Asia" 6 "Rest of World" 7 "Mixed France/Maghreb" 8"Mixed France/other" 99 "Missing" , replace 
label val ethn1FR ethn1FRlb
label var ethn1FR "ethnicity 9 cat"
tab ethn1FR, m

fre ethn1FR

*generate summarized ethnicity variables

recode ethn1FR (4=3) (6=4)(5=4)(7=5)(8=6), gen (ethn2FR)
label define ethn2FRlb 0 "France" 1 "Europe" 2 "Maghreb" 3"Middle East and Africa" 4"Asia and World" 5 "Mixed France/Maghreb" 6"Mixed France/other" 99 "Missing", replace
label val ethn2FR ethn2FRlb
label var ethn2FR "ethnicity 7 cat"
tab ethn2FR, m


recode ethn2FR (5/6=0), gen (ethn3FR)
label define ethn3FRlb 0 "France" 1 "Europe" 2 "Maghreb" 3"Middle East and Africa" 4"Asia and World" 99 "Missing", replace
label val ethn3FR ethn3FRlb
label var ethn3FR "ethnicity 5 cat"
tab ethn3FR


recode ethn1FR (2/8=1), gen(ethn1FRd)
label define ethn1FRdlb 0"France" 1 "migration background" 99 "Missing", replace
label val ethn1FRd ethn1FRdlb
tab ethn1FRd



********* ********* ********* ********* ********* ********* ********* ******** ********** ********* ********* ********* ********* ********* 

**** STEP 4: GENERATE GENERATION VARIABLE*****

*1, 2 and 3 generation

* 1. GENERATION : kid born abroad, both parents born abroad

* 1 1/2 . GENERATION : kid immigrated < 5 years age, both parents born abroad

* 2. GENERATION : 
	*kid born in france, both parents born abroad
	*kid born abroad but immigrated >= 5 years age, at least one parent born in france

* 3. GENERATION : 
	* kid born in france, at least one parent born in france
	* kid born abroad but immigrated < years age, at least one parent born in france

	
gen generat1FR=.

replace generat1FR=99 if ethn1FR==99
replace generat1FR=0 if ethn1FR==0 
replace generat1FR=1 if ethn1FR>0 & ethn1FR!=99 & ethnkidFR_1==0 & (Q3age >= 5 | Q3age==.)
replace generat1FR=2 if ethn1FR>0 & ethn1FR!=99 & ethnkidFR_1==0 &  Q3age < 5
replace generat1FR=3 if ethn1FR>0 & ethn1FR!=99 & ethnkidFR_1==1 & ethnparFR_1==0 & ethngpFR_1 <2
replace generat1FR=4 if ethn1FR>0 & ethn1FR!=99 & ethnkidFR_1==1 & ethnparFR_1==0 & ethngpFR_1 >=2
replace generat1FR=5 if ethn1FR>0 & ethn1FR!=99 & ethnkidFR_1==1 & ethnparFR_1==1 & ethngpFR_1 <2
replace generat1FR=6 if ethn1FR>0 & ethn1FR!=99 & ethnkidFR_1==1 & ethnparFR_1==1 & ethngpFR_1 ==2
replace generat1FR=7 if ethn1FR>0 & ethn1FR!=99 & ethnkidFR_1==1 & ethnparFR_1==2 & ethngpFR_1 <2
replace generat1FR=8 if ethn1FR>0 & ethn1FR!=99 & ethnkidFR_1==1 & ethnparFR_1==2 & ethngpFR_1 ==2

fre generat1FR

label var generat1FR "immigration generation - large"

label define generat1FRlb ///
0 "no immigration" ///
1 "kid foreign: immigrated >=5 yrs age or no information about age of immigration" ///
2 "kid foreign: immigrated < yrs age" ///
3 "kid native: parents foreign, less than two gp native" ///
4 "kid native: parents foreign, two or more gp native" ///
5 "kid native: one parent native, less than two gp native" ///
6 "kid native: one parent native, two gp native" ///
7 "kid native: two parents native, less than two gp native" ///
8 "kid native: two parents native, two gp native" ///
99 "missing", replace
label value generat1FR generat1FRlb
fre generat1FR

recode generat1FR (3=3) (4=3) (7=4) (8=4), gen(generat2FR)
label var generat2FR "immigration generation - short mixed separate"

replace generat2FR=3 if generat1FR==5 & (ethn1FR!=7 | ethn1FR!=8)

* mixed generation trunk 
replace generat2FR=5 if generat1FR==5 & (ethn1FR==7 | ethn1FR==8)
replace generat2FR=5 if generat1FR==6

label define generat2FRlb ///
0 "no immigration" ///
1 "1. generation, kid immigrated >=5 yrs age or no information about age of immigration" ///
2 "1. generation, kid immigrated < 5 yrs age" ///
3 "2. generation" ///
4 "3. generation" ///
5 "1 parent foreign + 2 gp foreign, 1 parent native + 2 gp native" ///
99 "missing", replace

label value generat2FR generat2FRlb

fre generat2FR

recode generat2FR (5=4), gen(generat3FR)
label var generat3FR "immigration generation - short mixed 2. generation"
label define generat3FRlb ///
0 "no immigration" ///
1 "1. generation, kid immigrated >=5 yrs age or no information about age of immigration" ///
2 "1. generation, kid immigrated < 5 yrs age" ///
3 "2. generation" ///
4 "3. generation" ///
99 "missing", replace
label value generat3FR generat3FRlb



*save data

save "../../STATAfiles/FR_POLIS_31juillet14_FINAL&IRIS_mb.dta", replace 


